Chapter 4 Community composition

load("data/data.Rdata")
quality <- read_tsv("results/quality.tsv")
Rows: 192 Columns: 2
── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: "\t"
chr (1): microsample
dbl (1): quality

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

4.1 Taxonomy barplot

4.1.1 Positive samples, coverage-filtered

#Get phylum colors from the EHI standard
phylum_colors <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
    right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
    arrange(match(genome, genome_tree$tip.label)) %>%
    select(phylum, colors) %>% 
    unique() %>%
    arrange(phylum) %>%
    select(colors) %>%
    pull()
Rows: 202 Columns: 2
── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: "\t"
chr (2): phylum, colors

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "microsample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  left_join(sample_metadata, by = join_by(microsample == microsample)) %>% #append sample metadata
  left_join(quality, by = join_by(microsample == microsample)) %>% #append sample metadata
  mutate(section=unlist(section)) %>%
  filter(!is.na(count)) %>%
  filter(count > 0) %>%
  filter(section != "Ileum") %>%
  filter(type == "Positive") %>%
  filter(quality >= 4) %>%
  ggplot(., aes(x=count,y=microsample, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
    geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
    scale_fill_manual(values=phylum_colors[-4]) +
    labs(x = "Relative abundance", y="Microsamples") +
    facet_nested(cryosection  ~ .,  scales="free_y") + #facet per day and treatment
 guides(fill = guide_legend(ncol = 1)) +
    theme(strip.text.y = element_text(angle = 0),
          axis.title.x = element_blank(),
          panel.background = element_blank(),
          panel.border = element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
          panel.spacing = unit(0, "lines")) +
   labs(fill="Phylum")

genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "microsample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  left_join(sample_metadata, by = join_by(microsample == microsample)) %>% #append sample metadata
  left_join(quality, by = join_by(microsample == microsample)) %>% #append sample metadata
  mutate(section=unlist(section)) %>%
  filter(!is.na(count)) %>%
  filter(count > 0) %>%
  filter(section != "Ileum") %>%
  filter(type == "Positive") %>%
  filter(quality >= 4) %>%
  mutate(genome=factor(genome,levels=genome_tree$tip.label)) %>% 
  ggplot(aes(x=count, y=microsample, fill=genus, group=genus)) + #grouping enables keeping the same sorting of taxonomic units
    geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
    labs(x = "Relative abundance", y="Microsamples") +
    facet_nested(cryosection  ~ .,  scales="free_y") + #facet per day and treatment
 guides(fill = guide_legend()) +
    theme(strip.text.y = element_text(angle = 0),
          axis.title.x = element_blank(),
          panel.background = element_blank(),
          panel.border = element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
          panel.spacing = unit(0, "lines"),
          legend.position="bottom") +
   labs(fill="Genus")

4.1.2 Positive samples, coverage-unfiltered

#Get phylum colors from the EHI standard
phylum_colors <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
    right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
    arrange(match(genome, genome_tree$tip.label)) %>%
    select(phylum, colors) %>% 
    unique() %>%
    arrange(phylum) %>%
    select(colors) %>%
    pull()
Rows: 202 Columns: 2
── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: "\t"
chr (2): phylum, colors

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
genome_counts %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "microsample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  left_join(sample_metadata, by = join_by(microsample == microsample)) %>% #append sample metadata
  left_join(quality, by = join_by(microsample == microsample)) %>% #append sample metadata
  mutate(section=unlist(section)) %>%
  filter(!is.na(count)) %>%
  filter(count > 0) %>%
  filter(section != "Ileum") %>%
  filter(type == "Positive") %>%
  filter(quality >= 4) %>%
  ggplot(., aes(x=count,y=microsample, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
    geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
    scale_fill_manual(values=phylum_colors) +
    labs(x = "Relative abundance", y="Microsamples") +
    facet_nested(cryosection ~ .,  scales="free_y", space="free") + #facet per day and treatment
 guides(fill = guide_legend(ncol = 1)) +
    theme(strip.text.y = element_text(angle = 0),
          axis.title.x = element_blank(),
          panel.background = element_blank(),
          panel.border = element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
          panel.spacing = unit(0, "lines")) +
   labs(fill="Phylum")

4.1.3 Control samples, coverage-unfiltered

#Get phylum colors from the EHI standard
phylum_colors <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
    right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
    arrange(match(genome, genome_tree$tip.label)) %>%
    #filter(phylum %in% c("p__Actinomycetota","p__Bacillota","p__Bacillota_A","p__Pseudomonadota","p__Verrucomicrobiota")) %>%
    select(phylum, colors) %>% 
    unique() %>%
    arrange(phylum) %>%
    select(colors) %>%
    pull()
Rows: 202 Columns: 2
── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: "\t"
chr (2): phylum, colors

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
genome_counts %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "microsample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  left_join(., sample_metadata, by = join_by(microsample == microsample)) %>% #append sample metadata
  filter(is.na(Xcoord)) %>%
  filter(type %in% c("NegativeMembrane","NegativeCollection","NegativeReaction")) %>%
  ggplot(., aes(x=count,y=microsample, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
    geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
    scale_fill_manual(values=phylum_colors) +
    labs(x = "Relative abundance", y="Membrane controls") +
    facet_nested(cryosection + type ~ .,  scales="free_y") + #facet per day and treatment
 guides(fill = guide_legend(ncol = 1)) +
    theme(strip.text.y = element_text(angle = 0),
          axis.text.y = element_blank(),
          axis.title.x = element_blank(),
          panel.background = element_blank(),
          panel.border = element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
          panel.spacing = unit(0, "lines")) +
   labs(fill="Phylum")

4.2 MAG overview

vertical_tree <- force.ultrametric(genome_tree,method="extend") %>%
        ggtree(., size = 0.3)
***************************************************************
*                          Note:                              *
*    force.ultrametric does not include a formal method to    *
*    ultrametricize a tree & should only be used to coerce    *
*   a phylogeny that fails is.ultrametric due to rounding --  *
*    not as a substitute for formal rate-smoothing methods.   *
***************************************************************
Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
Also defined by 'tidytree'
phylum_colors_heatmap <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
  right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
    arrange(match(genome, genome_tree$tip.label)) %>%
    mutate(phylum = factor(phylum, levels = unique(phylum))) %>%
    column_to_rownames(var = "genome") %>%
    select(phylum)
Rows: 202 Columns: 2
── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: "\t"
chr (2): phylum, colors

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
vertical_tree <- gheatmap(vertical_tree, phylum_colors_heatmap, offset=-0.3, width=0.1, colnames=FALSE) +
    scale_fill_manual(values=phylum_colors) +
    new_scale_fill()
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Scale for fill is already present.
Adding another scale for fill, which will replace the existing scale.
sample_selection <- sample_metadata %>%
      filter(!is.na(Xcoord)) %>%
      left_join(quality, by=join_by(microsample==microsample)) %>%
      filter(quality>=4) %>% select(microsample) %>% pull()

genome_counts_selected <- genome_counts_filt %>%
          select(all_of(c("genome",sample_selection))) %>% column_to_rownames(var="genome") %>% tss()

vertical_tree <- gheatmap(vertical_tree, genome_counts_selected, offset=-0.2, width=0.5, colnames=FALSE, colnames_angle=90, font.size=3, colnames_position="top", colnames_offset_y = 15) +
    vexpand(.08) +
    coord_cartesian(clip = "off") +
    scale_fill_gradient(low = "#f4f4f4", high = "#315b7d", na.value="#f4f4f4") +
    new_scale_fill()
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Scale for fill is already present.
Adding another scale for fill, which will replace the existing scale.
vertical_tree

mag_summary <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "microsample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(sample_metadata, by = join_by(microsample == microsample)) %>% #append sample metadata
  left_join(genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  left_join(quality, by = join_by(microsample == microsample)) %>% #append sample metadata
  filter(quality>=5) %>%
  group_by(microsample,cryosection,phylum,genome) %>%
  summarise(relabun=sum(count)) %>%
    group_by(genome) %>%
    summarise(mean=mean(relabun, na.rm=T),sd=sd(relabun, na.rm=T)) %>% 
    arrange(-mean) 

mag_summary %>%
    slice(1:30) %>% 
    tt()
tinytable_fkcjgmcxl0hb6f0310at
genome mean sd
TG5_35:bin_000001 0.134322212 0.0184322215
TG5_28:bin_000004 0.073959723 0.0140085108
D300514:bin_000001 0.066013869 0.0034101332
D300503:bin_000003 0.058390022 0.0073507361
D300499:bin_000015 0.047828161 0.0077144657
GPB:bin_000131 0.045153840 0.0078676163
GPB:bin_000128 0.038586933 0.0040052884
GPB:bin_000067 0.035675249 0.0065953450
D300464:bin_000017 0.034341911 0.0031772466
D300513:bin_000002 0.033892021 0.0030309458
GPB:bin_000035 0.025451423 0.0039111919
GPB:bin_000098 0.024901976 0.0036591647
GPB:bin_000150 0.022771123 0.0053096327
GPB:bin_000180 0.016388510 0.0009179975
GPB:bin_000100 0.015671471 0.0026522201
GPB:bin_000051 0.014464197 0.0022369472
GPB:bin_000071 0.013664129 0.0010044884
GPB:bin_000146 0.013104024 0.0032609807
GPB:bin_000177 0.012317879 0.0006833931
GPB:bin_000110 0.011947547 0.0023342738
GPB:bin_000096 0.010819631 0.0036674499
GPB:bin_000008 0.010799219 0.0019382795
D300479:bin_000012 0.010381828 0.0009176369
GPB:bin_000097 0.010345378 0.0010747212
GPB:bin_000121 0.009323384 0.0012855502
D300479:bin_000001 0.009284298 0.0030708565
TG5:bin_000047 0.009010480 0.0009725334
GPB:bin_000092 0.008360823 0.0008390804
GPB:bin_000161 0.008117754 0.0009590991
TG5_21:bin_000003 0.006980248 0.0072282716
genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "microsample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(sample_metadata, by = join_by(microsample == microsample)) %>% #append sample metadata
  left_join(genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  left_join(quality, by = join_by(microsample == microsample)) %>% #append sample metadata
  filter(quality>=4) %>%
    mutate(genome=factor(genome,levels=rev(mag_summary %>% pull(genome)))) %>%
    filter(count > 0) %>%
    ggplot(aes(x=count, y=genome, group=genome, color=phylum)) +
        scale_color_manual(values=phylum_colors) +
        geom_jitter(alpha=0.3) + 
        facet_grid(.~cryosection)+
        theme_minimal() + 
        labs(y="Family", x="Relative abundance", color="Phylum")

4.3 Genus overview

genus_summary <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "microsample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(sample_metadata, by = join_by(microsample == microsample)) %>% #append sample metadata
  left_join(genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  left_join(quality, by = join_by(microsample == microsample)) %>% #append sample metadata
  filter(quality>=4) %>%
  group_by(microsample,cryosection,phylum,genus) %>%
  summarise(relabun=sum(count)) %>%
  filter(genus != "g__") %>%
  mutate(genus= sub("^g__", "", genus))

genus_summary_sort <- genus_summary %>%
    group_by(genus) %>%
    summarise(mean=mean(relabun, na.rm=T),sd=sd(relabun, na.rm=T)) %>%
    arrange(-mean)

genus_summary_sort %>%
    tt()
tinytable_tufxruwvex6h822iocyb
genus mean sd
Streptococcus 1.606213e-01 0.1212753087
Eisenbergiella 1.220605e-01 0.0238130945
Lactobacillus 9.747618e-02 0.0376004834
Mediterraneibacter 9.595320e-02 0.0224881290
Fimenecus 6.118323e-02 0.0213529576
Caccovicinus 6.033000e-02 0.0169008643
Blautia 5.026773e-02 0.0130645964
Gallimonas 4.465074e-02 0.0124703326
Pelethomonas 4.106568e-02 0.0118553205
Choladocola 3.986651e-02 0.0092494723
Agathobaculum 3.068570e-02 0.0124206142
Anaerobutyricum 2.211240e-02 0.0070097755
Acutalibacter 1.912411e-02 0.0073248590
UBA1417 1.296980e-02 0.0067630443
Faecousia 1.154478e-02 0.0045572982
Negativibacillus 1.140602e-02 0.0041768322
Lawsonibacter 1.016180e-02 0.0060676472
UMGS1370 8.845382e-03 0.0030934368
Lachnoclostridium_A 8.304476e-03 0.0055131868
Flavonifractor 7.623195e-03 0.0043770472
Thomasclavelia 6.024292e-03 0.0103158677
Rubneribacter 5.883512e-03 0.0041559833
Enterocloster 5.597011e-03 0.0033959785
HGM12545 5.533541e-03 0.0028478099
Sellimonas 5.243184e-03 0.0029443141
Merdisoma 4.846626e-03 0.0037545844
Scatosoma 4.652718e-03 0.0024925991
Copromonas 4.210315e-03 0.0024423900
Anaeromassilibacillus 4.000736e-03 0.0069808274
Merdivicinus 3.824202e-03 0.0026857651
Pullichristensenella 3.729579e-03 0.0024806005
Intestinimonas 3.454784e-03 0.0028426937
Enterenecus 3.433201e-03 0.0100625004
Scatomorpha 3.413170e-03 0.0024259276
Lachnoclostridium_B 3.203630e-03 0.0022888898
Anaerostipes 2.544495e-03 0.0024493868
Dysosmobacter 2.106625e-03 0.0019183135
Escherichia 1.893296e-03 0.0017959422
Massilimicrobiota 1.809809e-03 0.0020983661
Faecivivens 1.809083e-03 0.0018827249
Akkermansia 9.764831e-04 0.0012854307
Borkfalkia 7.893354e-04 0.0011905061
Roslinia 6.379381e-04 0.0010859594
Ruthenibacterium 5.165143e-04 0.0008778587
Romboutsia 4.974542e-04 0.0009965553
Anaerotignum 4.953307e-04 0.0009408635
Ornithomonoglobus 4.497739e-04 0.0008517372
Scatavimonas 3.891001e-04 0.0007677824
Evtepia 3.516998e-04 0.0008301186
Gallacutalibacter 3.228169e-04 0.0008048318
Merdimonas 2.761897e-04 0.0009029656
HGM12998 1.487547e-04 0.0004443756
UBA4716 1.425431e-04 0.0004271822
Clostridium_Q 6.620711e-05 0.0003523477
Clostridium_AQ 3.751594e-05 0.0001967550
Anaerotruncus 3.682229e-05 0.0001941056
Gordonibacter 1.624910e-05 0.0001215971
Acetatifactor 0.000000e+00 0.0000000000
Alangreenwoodia 0.000000e+00 0.0000000000
Alistipes 0.000000e+00 0.0000000000
An181 0.000000e+00 0.0000000000
Angelakisella 0.000000e+00 0.0000000000
Avimicrobium 0.000000e+00 0.0000000000
Avoscillospira 0.000000e+00 0.0000000000
Bifidobacterium 0.000000e+00 0.0000000000
Blautia_A 0.000000e+00 0.0000000000
Butyricicoccus 0.000000e+00 0.0000000000
CAG-245 0.000000e+00 0.0000000000
CAG-269 0.000000e+00 0.0000000000
CAG-273 0.000000e+00 0.0000000000
CAG-302 0.000000e+00 0.0000000000
CAG-313 0.000000e+00 0.0000000000
CAJFPI01 0.000000e+00 0.0000000000
CAJFUH01 0.000000e+00 0.0000000000
Caccenecus 0.000000e+00 0.0000000000
Caccomorpha 0.000000e+00 0.0000000000
Caccousia 0.000000e+00 0.0000000000
Catenibacillus 0.000000e+00 0.0000000000
Coprocola 0.000000e+00 0.0000000000
Coproplasma 0.000000e+00 0.0000000000
Egerieicola 0.000000e+00 0.0000000000
Enterococcus 0.000000e+00 0.0000000000
Enterococcus_B 0.000000e+00 0.0000000000
Enterococcus_D 0.000000e+00 0.0000000000
Eubacterium_R 0.000000e+00 0.0000000000
Faecalibacterium 0.000000e+00 0.0000000000
Faeciplasma 0.000000e+00 0.0000000000
Fimicola 0.000000e+00 0.0000000000
Fimimorpha 0.000000e+00 0.0000000000
Fimivicinus 0.000000e+00 0.0000000000
Fournierella 0.000000e+00 0.0000000000
Gallispira 0.000000e+00 0.0000000000
Galloscillospira_A 0.000000e+00 0.0000000000
Gemmiger 0.000000e+00 0.0000000000
Harrysmithimonas 0.000000e+00 0.0000000000
Heritagella 0.000000e+00 0.0000000000
Heteroclostridium 0.000000e+00 0.0000000000
Holdemania 0.000000e+00 0.0000000000
Hungatella_B 0.000000e+00 0.0000000000
JAETTH01 0.000000e+00 0.0000000000
Klebsiella 0.000000e+00 0.0000000000
Ligilactobacillus 0.000000e+00 0.0000000000
Limosilactobacillus 0.000000e+00 0.0000000000
Merdibacter 0.000000e+00 0.0000000000
Metalachnospira 0.000000e+00 0.0000000000
Neoanaerotignum_A 0.000000e+00 0.0000000000
Onthovicinus 0.000000e+00 0.0000000000
Ornithomonoglobus_A 0.000000e+00 0.0000000000
Paenibacillus_A 0.000000e+00 0.0000000000
Pararuminococcus 0.000000e+00 0.0000000000
Proteus 0.000000e+00 0.0000000000
Pseudobutyricicoccus 0.000000e+00 0.0000000000
Pullilachnospira 0.000000e+00 0.0000000000
RUG591 0.000000e+00 0.0000000000
RUG626 0.000000e+00 0.0000000000
Ruminococcus_G 0.000000e+00 0.0000000000
Salmonella 0.000000e+00 0.0000000000
Sarcina 0.000000e+00 0.0000000000
Scybalenecus 0.000000e+00 0.0000000000
Spyradocola 0.000000e+00 0.0000000000
Timburyella 0.000000e+00 0.0000000000
Tyzzerella 0.000000e+00 0.0000000000
UMGS775 0.000000e+00 0.0000000000
UMGS856 0.000000e+00 0.0000000000
genus_arrange <- genus_summary %>%
    group_by(genus) %>%
    summarise(mean=sum(relabun)) %>%
    filter(genus != "g__")%>%
    arrange(-mean) %>%
    select(genus) %>%
    mutate(genus= sub("^g__", "", genus)) %>%
    pull()

genus_summary %>%
    mutate(genus=factor(genus,levels=rev(genus_summary_sort %>% pull(genus)))) %>%
    filter(relabun > 0) %>%
    ggplot(aes(x=relabun, y=genus, group=genus, color=phylum)) +
        scale_color_manual(values=phylum_colors) +
        geom_jitter(alpha=0.3) + 
        facet_grid(.~cryosection)+
        theme_minimal() + 
        labs(y="Family", x="Relative abundance", color="Phylum")

taxon_level="genus"

taxon_list =c("g__Lactobacillus","g__Escherichia","g__Streptococcus","g__Eisenbergiella","g__Mediterraneibacter","g__Caccovicinus")

genus_summary <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "microsample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(sample_metadata, by = join_by(microsample == microsample)) %>% #append sample metadata
  left_join(quality, by=join_by(microsample==microsample)) %>%
  filter(quality>=5) %>%
  left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  group_by(microsample,cryosection,!!sym(taxon_level)) %>%
  summarise(relabun=sum(count))
genus_summary %>%
  mutate(individual=substr(cryosection, 1, 4)) %>% 
  filter(genus %in% c("g__Caccovicinus","g__Streptococcus")) %>%
  pivot_wider(names_from=genus,values_from=relabun) %>%
  ggplot(aes(x=g__Caccovicinus, y=g__Streptococcus)) +
    geom_point() +
    geom_smooth(method=lm , color="red", fill="#69b3a2", se=TRUE) +
    facet_grid(~individual)+
    theme_minimal()

genus_summary %>%
  mutate(individual=substr(cryosection, 1, 4)) %>% 
  filter(genus %in% c("g__Lactobacillus","g__Streptococcus")) %>%
  pivot_wider(names_from=genus,values_from=relabun) %>%
  ggplot(aes(x=g__Lactobacillus, y=g__Streptococcus)) +
    geom_point() +
    geom_smooth(method=lm , color="red", fill="#69b3a2", se=TRUE) +
    facet_grid(~individual)+
    theme_minimal()

genus_summary %>%
  mutate(individual=substr(cryosection, 1, 4)) %>% 
  filter(genus %in% c("g__Lactobacillus","g__Mediterraneibacter")) %>%
  pivot_wider(names_from=genus,values_from=relabun) %>%
  ggplot(aes(x=g__Mediterraneibacter, y=g__Lactobacillus)) +
    geom_point() +
    geom_smooth(method=lm , color="red", fill="#69b3a2", se=TRUE) +
    facet_grid(~individual)+
    theme_minimal()

genus_summary %>%
  mutate(individual=substr(cryosection, 1, 4)) %>% 
  filter(genus %in% c("g__Streptococcus","g__Mediterraneibacter")) %>%
  pivot_wider(names_from=genus,values_from=relabun) %>%
  ggplot(aes(x=g__Streptococcus, y=g__Mediterraneibacter)) +
    geom_point() +
    geom_smooth(method=lm , color="red", fill="#69b3a2", se=TRUE) +
    facet_grid(~individual)+
    theme_minimal()

genus_summary %>%
  mutate(individual=substr(cryosection, 1, 4)) %>% 
  filter(genus %in% c("g__Lactobacillus","g__Caccovicinus")) %>%
  pivot_wider(names_from=genus,values_from=relabun) %>%
  ggplot(aes(x=g__Caccovicinus, y=g__Lactobacillus)) +
    geom_point() +
    geom_smooth(method=lm , color="red", fill="#69b3a2", se=TRUE) +
    facet_grid(~individual)+
    theme_minimal()

genus_summary %>%
  mutate(individual=substr(cryosection, 1, 4)) %>% 
  filter(genus %in% c("g__Lactobacillus","g__Escherichia")) %>%
  pivot_wider(names_from=genus,values_from=relabun) %>%
  ggplot(aes(x=g__Escherichia, y=g__Lactobacillus)) +
    geom_point() +
    geom_smooth(method=lm , color="red", fill="#69b3a2", se=TRUE) +
    facet_grid(~individual)+
    theme_minimal()

genus_summary %>%
  mutate(individual=substr(cryosection, 1, 4)) %>% 
  filter(genus %in% c("g__Streptococcus","g__Caccovicinus")) %>%
  pivot_wider(names_from=genus,values_from=relabun) %>%
  ggplot(aes(x=g__Streptococcus, y=g__Caccovicinus)) +
    geom_point() +
    geom_smooth(method=lm , color="red", fill="#69b3a2", se=TRUE) +
    facet_grid(~individual)+
    theme_minimal()